package su.theravada.jpalireader.util;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

public class PaliUtil {
	private static char[] arrFind = new char[] { 'ā', 'ī', 'ū', 'ṭ', 'ñ', 'ṃ', 'ṇ', 'ṅ', 'ḷ', 'ḍ', 'Ā', 'Ī', 'Ū', 'Ṭ', 'Ñ', 'Ḍ' };
    private static char[] arrReplace = new char[] { 'a', 'i', 'u', 't', 'n', 'm', 'n', 'n', 'l', 'd', 'A', 'I', 'U', 'T', 'N', 'D' };    
    private static String[] arrReplaceRegex = new String[] { "aāâ", "iīî", "uūû", "tṭ", "nṇṅñ", "mṃ", "n", "n", "lḷ", "dḍ", "A", "I", "U", "T", "N", "D" };
    
    public static HashSet<String> getPaliStopWords()
    {
    	List<String> arrPaliStopWords=Arrays.asList(new String[] { "ca", "va", "vā", "pi", "kho","yo","so","taṃ","evaṃ" });
    	return new HashSet<String>(arrPaliStopWords);
    }
    
    public static String ConvertDiac(String strInput)
    {
        for (int i = 0; i < arrFind.length; i++)
            strInput = strInput.replace(arrFind[i], arrReplace[i]);

        return strInput;
    }
            
    public static String ConvertToRegex(String strPattern)
    {
    	//this will keep Regular Expressions anchors
        String strRetVal = "";
        boolean bReplaced = false;

        //default is exact match. surround with asterisks to find all
        if (strPattern.startsWith("*"))
            strPattern = strPattern.substring(1);
        else
            strPattern = "\\A" + strPattern;

        if (strPattern.endsWith("*"))
            strPattern = strPattern.substring(0,strPattern.length()-1);
        else
            strPattern += "\\z";

        String strPatternNoDiac=ConvertDiac(strPattern).toLowerCase();

        for(int j=0;j<strPatternNoDiac.length();j++)
        {
            char ch=strPatternNoDiac.charAt(j);
            bReplaced = false;

            if ((j > 0 && strPatternNoDiac.charAt(j - 1) == '\\'))
            {
                strRetVal += strPattern.charAt(j);
                bReplaced = true;
            }
            else
            {
                for (int i = 0; i < arrReplace.length; i++)
                    if (arrReplace[i] == ch)
                    {
                        strRetVal += "[" + arrReplaceRegex[i] + "]";
                        bReplaced = true;
                        break;
                    }
            }

            if(!bReplaced)
                strRetVal += ch;
        }

        return strRetVal;
    }
    
}
